/*
* Copyright 2011-2014 Proofpoint, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.proofpoint.event.collector.combiner;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.hash.Hashing;
import com.google.common.io.ByteSource;
import com.google.common.io.Closeables;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.Files;
import com.proofpoint.event.client.InMemoryEventClient;
import com.proofpoint.event.collector.EventPartition;
import com.proofpoint.json.JsonCodec;
import com.proofpoint.units.DataSize;
import org.joda.time.DateTime;
import org.joda.time.format.ISODateTimeFormat;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Parameters;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import static com.google.common.collect.Maps.newHashMap;
import static com.google.common.collect.Maps.newTreeMap;
import static com.proofpoint.event.collector.combiner.S3StorageHelper.getS3ObjectKey;
import static com.proofpoint.event.collector.combiner.StoredObject.GET_LOCATION_FUNCTION;
import static org.apache.commons.codec.binary.Hex.encodeHexString;
import static org.joda.time.DateTimeZone.UTC;
@Test(groups = "aws")
public class TestS3Combine
{
private static final int TIME_SLICE_DAYS_AGO = 2;
private static final int START_DAYS_AGO = TIME_SLICE_DAYS_AGO + 10;
private static final int END_DAYS_AGO = 0;
private static final String EVENT_TYPE = "TestEvent";
private static final String TIME_SLICE = DateTime.now(UTC).withTimeAtStartOfDay().minusDays(TIME_SLICE_DAYS_AGO).toString(ISODateTimeFormat.date().withZone(UTC));
private static final int MIN_LARGE_FILE_LENGTH = 5 * 1024 * 1024;
private static final int MIN_SMALL_FILE_LENGTH = 10 * 1024;
private static final String HOUR = "08";
private static final Function<StoredObject, KeyVersion> KEY_VERSION_FROM_STORED_OBJECT_TRANSFORMER = new Function<StoredObject, KeyVersion>()
{
@Override
public KeyVersion apply(StoredObject storedObject)
{
return new KeyVersion(getS3ObjectKey(storedObject.getLocation()));
}
};
private String testBucket;
private AmazonS3 service;
private TransferManager transferManager;
private StoredObjectCombiner objectCombiner;
private URI testBaseUri;
private URI stagingBaseUri;
private URI targetBaseUri;
private S3StorageSystem storageSystem;
private TestingCombineObjectMetadataStore metadataStore;
private InMemoryEventClient eventClient;
@BeforeClass
@Parameters({"aws-credentials-file", "aws-test-bucket"})
public void setUpClass(String awsCredentialsFile, String awsTestBucket)
throws Exception
{
String credentialsJson = Files.toString(new File(awsCredentialsFile), Charsets.UTF_8);
Map<String, String> map = JsonCodec.mapJsonCodec(String.class, String.class).fromJson(credentialsJson);
String awsAccessKey = map.get("access-id");
String awsSecretKey = map.get("private-key");
AWSCredentials awsCredentials = new BasicAWSCredentials(awsAccessKey, awsSecretKey);
service = new AmazonS3Client(awsCredentials);
transferManager = new TransferManager(awsCredentials);
testBucket = awsTestBucket;
if (!service.doesBucketExist(testBucket)) {
service.createBucket(testBucket);
}
}
@BeforeMethod
public void setUpMethod()
{
String randomPart = "CombineTest-" + UUID.randomUUID().toString().replace("-", "");
testBaseUri = S3StorageHelper.buildS3Location("s3://", testBucket, randomPart);
stagingBaseUri = S3StorageHelper.buildS3Location(testBaseUri, "staging/");
targetBaseUri = S3StorageHelper.buildS3Location(testBaseUri, "target/");
eventClient = new InMemoryEventClient();
storageSystem = new S3StorageSystem(service, transferManager);
metadataStore = new TestingCombineObjectMetadataStore();
objectCombiner = new StoredObjectCombiner("test",
metadataStore,
storageSystem,
eventClient,
stagingBaseUri,
targetBaseUri,
new DataSize(512, DataSize.Unit.MEGABYTE),
START_DAYS_AGO,
END_DAYS_AGO,
"testGroup");
}
@AfterMethod
public void tearDownMethod()
{
DeleteObjectsRequest request = new DeleteObjectsRequest(testBucket).withKeys(findKeysToDelete(testBaseUri));
service.deleteObjects(request);
}
@Test
public void testLargeCombine()
throws Exception
{
EventPartition eventPartition = new EventPartition(EVENT_TYPE, TIME_SLICE, HOUR);
String sizeName = "large";
URI groupPrefix = S3StorageHelper.buildS3Location(targetBaseUri, EVENT_TYPE, TIME_SLICE, HOUR + ".large");
URI target = S3StorageHelper.appendSuffix(groupPrefix, "00000.json.snappy");
// upload two 5 MB files
String base = UUID.randomUUID().toString().replace("-", "");
Map<URI, ByteSource> files = newHashMap();
for (int i = 0; i < 2; i++) {
URI name = createStagingFileName(base, i + 10);
File file = uploadFile(name, MIN_LARGE_FILE_LENGTH);
files.put(name, Files.asByteSource(file));
}
// combine the files
objectCombiner.combineAllObjects();
// verify the contents
ByteSource s3InputSupplier = storageSystem.getInputSupplier(target);
ByteSource combinedInputs = getCombinedInputsSupplier(eventPartition, sizeName, files, target);
if (!combinedInputs.contentEquals(s3InputSupplier)) {
Assert.fail("broken");
}
// upload two more chunks
for (int i = 0; i < 2; i++) {
URI name = createStagingFileName(base, i);
File file = uploadFile(name, MIN_LARGE_FILE_LENGTH);
files.put(name, Files.asByteSource(file));
}
// combine the files
objectCombiner.combineAllObjects();
// verify the contents
s3InputSupplier = storageSystem.getInputSupplier(target);
combinedInputs = getCombinedInputsSupplier(eventPartition, sizeName, files, target);
if (!combinedInputs.contentEquals(s3InputSupplier)) {
Assert.fail("broken");
}
// verify version combiner doesn't recombine unchanged files
CombinedGroup combinedObjectManifest = metadataStore.getCombinedGroupManifest(eventPartition, sizeName);
long currentVersion = combinedObjectManifest.getVersion();
objectCombiner.combineAllObjects();
CombinedGroup newCombinedStoredObjectManifest = metadataStore.getCombinedGroupManifest(eventPartition, sizeName);
Assert.assertEquals(newCombinedStoredObjectManifest.getVersion(), currentVersion);
// verify that events were fired
Assert.assertEquals(eventClient.getEvents().size(), 3);
}
@Test
public void testSmallCombine()
throws Exception
{
EventPartition eventPartition = new EventPartition(EVENT_TYPE, TIME_SLICE, HOUR);
String sizeName = "small";
URI groupPrefix = S3StorageHelper.buildS3Location(targetBaseUri, EVENT_TYPE, TIME_SLICE, HOUR + ".small");
URI target = S3StorageHelper.appendSuffix(groupPrefix, "00000.json.snappy");
// upload two 10 KB file for to each name
String base = UUID.randomUUID().toString().replace("-", "");
Map<URI, ByteSource> files = newHashMap();
for (int i = 0; i < 2; i++) {
URI name = createStagingFileName(base, i + 10);
File file = uploadFile(name, MIN_SMALL_FILE_LENGTH);
files.put(name, Files.asByteSource(file));
}
// combine the files
objectCombiner.combineAllObjects();
// verify the contents
StoredObject combinedObject = storageSystem.getObjectDetails(target);
ByteSource combinedInputs = getCombinedInputsSupplier(eventPartition, sizeName, files, target);
String sourceMD5 = encodeHexString(combinedInputs.hash(Hashing.md5()).asBytes());
if (!sourceMD5.equals(combinedObject.getETag())) {
Assert.fail("broken");
}
// upload two more chunks
for (int i = 0; i < 2; i++) {
URI name = createStagingFileName(base, i);
File file = uploadFile(name, MIN_SMALL_FILE_LENGTH);
files.put(name, Files.asByteSource(file));
}
// combine the files
objectCombiner.combineAllObjects();
// verify the contents
combinedObject = storageSystem.getObjectDetails(target);
combinedInputs = getCombinedInputsSupplier(eventPartition, sizeName, files, target);
sourceMD5 = encodeHexString(combinedInputs.hash(Hashing.md5()).asBytes());
if (!sourceMD5.equals(combinedObject.getETag())) {
Assert.fail("broken");
}
// verify version combiner doesn't recombine unchanged files
CombinedGroup combinedObjectManifest = metadataStore.getCombinedGroupManifest(eventPartition, sizeName);
long currentVersion = combinedObjectManifest.getVersion();
objectCombiner.combineAllObjects();
CombinedGroup newCombinedStoredObjectManifest = metadataStore.getCombinedGroupManifest(eventPartition, sizeName);
Assert.assertEquals(newCombinedStoredObjectManifest.getVersion(), currentVersion);
// verify that events were fired
Assert.assertEquals(eventClient.getEvents().size(), 3);
}
private ByteSource getCombinedInputsSupplier(EventPartition eventPartition, String sizeName, Map<URI, ByteSource> files, URI target)
{
// get the manifest for the group prefix
CombinedGroup combinedObjectManifest = metadataStore.getCombinedGroupManifest(eventPartition, sizeName);
// get the combined stored object for the target
CombinedStoredObject combinedObject = combinedObjectManifest.getCombinedObject(target);
Assert.assertNotNull(combinedObject);
// get the locations of each part (in order)
List<URI> sourcePartsLocation = Lists.transform(combinedObject.getSourceParts(), GET_LOCATION_FUNCTION);
// sort the supplied files map based on this explicit order
Map<URI, ByteSource> parts = newTreeMap(Ordering.explicit(sourcePartsLocation));
parts.putAll(files);
// join the parts
return ByteSource.concat(parts.values());
}
private URI createStagingFileName(String base, int i)
{
return S3StorageHelper.buildS3Location(stagingBaseUri, EVENT_TYPE, TIME_SLICE, HOUR, String.format("part-%s-%04d", base, i));
}
private File uploadFile(URI location, int minFileLength)
throws IOException
{
File tempFile = File.createTempFile(S3StorageHelper.getS3FileName(location), ".s3.data");
CountingOutputStream countingOutputStream = null;
try {
// write contents to a temp file
countingOutputStream = new CountingOutputStream(new FileOutputStream(tempFile));
while (countingOutputStream.getCount() < minFileLength) {
String line = "This is object " + location + " at offset " + countingOutputStream.getCount() + "\n";
countingOutputStream.write(line.getBytes(Charsets.UTF_8));
}
countingOutputStream.flush();
countingOutputStream.close();
// upload
final StoredObject target = new StoredObject(location);
storageSystem.putObject(target.getLocation(), tempFile);
}
catch (Throwable t) {
Closeables.close(countingOutputStream, true);
tempFile.delete();
throw Throwables.propagate(t);
}
return tempFile;
}
private List<KeyVersion> findKeysToDelete(URI uri)
{
ImmutableList.Builder resultBuilder = ImmutableList.builder();
findKeysToDelete(resultBuilder, uri);
return resultBuilder.build();
}
private void findKeysToDelete(ImmutableList.Builder resultBuilder, URI uri)
{
resultBuilder.addAll(Lists.transform(storageSystem.listObjects(uri), KEY_VERSION_FROM_STORED_OBJECT_TRANSFORMER));
for (URI subdir : storageSystem.listDirectories(uri)) {
findKeysToDelete(resultBuilder, subdir);
}
}
}